GitHub Repository: https://github.com/sastoudt/stat248project_BayDeltaTS

Finding good subsets of the data that have the most complete records.

setwd("~/Desktop/sfei")
sfei<-read.csv("sfeiPlusDates.csv",stringsAsFactors=F)
names(sfei)
##  [1] "Date"      "Station"   "Longitude" "Latitude"  "Depth"    
##  [6] "nh4"       "bod"       "cl"        "clt"       "chl"      
## [11] "EC"        "tkn"       "no3"       "no2"       "no23"     
## [16] "doc"       "toc"       "don"       "ton"       "po4"      
## [21] "do"        "pH"        "pheo"      "tp"        "secchi"   
## [26] "sio2"      "tds"       "tss"       "vss"       "temp"     
## [31] "turb"      "sal"       "do_per"    "din"       "tn"       
## [36] "date_dec"  "doy"
require(ggplot2)
## Loading required package: ggplot2
sub=subset(sfei,Station%in%c("D10","D12","D4","D22","D26")) ## middle/most connected region of interest
## EDA to find out which station/variable combos have most complete records
ggplot(sub, aes(x=date_dec, y=din, colour=as.factor(Station)))+geom_line()+xlim(2000,2015) ## care about recent past
## Warning: Removed 924 rows containing missing values (geom_path).

See how much missingness.

##percent
percentMissingChl=percentMissingDo=percentMissingPheo=percentMissingTemp=percentMissingSal=c()
perStation=unique(sfei$Station)
for(i in 1:length(perStation)){
  
  data=subset(sfei,date_dec>2000 &Station==perStation[i])
  percentMissingChl<-c(percentMissingChl,length(which(is.na(data$chl)))/nrow(data))
  percentMissingDo<-c(percentMissingDo,length(which(is.na(data$do)))/nrow(data))
  percentMissingPheo<-c(percentMissingPheo,length(which(is.na(data$pheo)))/nrow(data))
  percentMissingTemp<-c(percentMissingTemp,length(which(is.na(data$temp)))/nrow(data))
  percentMissingSal<-c(percentMissingSal,length(which(is.na(data$sal)))/nrow(data))
  
  ## chl
  ## do
  ## pheo
  ## temp
  ## sal
  
  
}

percentMissingChl[which(perStation%in%c("D10","D12","D4","D22","D26"))] 
## [1] 0 0 0 0 0
percentMissingDo[which(perStation%in%c("D10","D12","D4","D22","D26"))]  
## [1] 0.05232558 0.05202312 0.04093567 0.01149425 0.01149425
percentMissingPheo[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0 0 0 0 0
percentMissingTemp[which(perStation%in%c("D10","D12","D4","D22","D26"))] 
## [1] 0.03488372 0.03468208 0.01754386 0.01149425 0.01149425
percentMissingSal[which(perStation%in%c("D10","D12","D4","D22","D26"))]
## [1] 0.04069767 0.04046243 0.01754386 0.01149425 0.01149425
## counts

percentMissingChl=percentMissingDo=percentMissingPheo=percentMissingTemp=percentMissingSal=c()
perStation=c("D10","D12","D4","D22","D26")
for(i in 1:length(perStation)){
  
  data=subset(sfei,date_dec>2000 &Station==perStation[i])
  percentMissingChl<-c(percentMissingChl,length(which(is.na(data$chl))))
  percentMissingDo<-c(percentMissingDo,length(which(is.na(data$do))))
  percentMissingPheo<-c(percentMissingPheo,length(which(is.na(data$pheo))))
  percentMissingTemp<-c(percentMissingTemp,length(which(is.na(data$temp))))
  percentMissingSal<-c(percentMissingSal,length(which(is.na(data$sal))))
  
  ## chl
  ## do
  ## pheo
  ## temp
  ## sal
  
  
}
percentMissingDo 
## [1] 9 9 2 7 2
percentMissingTemp
## [1] 6 6 2 3 2
percentMissingSal
## [1] 7 7 2 3 2
## any back to back missing values?
## avg value before and after missing values

stationUse=c("D10","D12","D4","D22","D26")
for(i in 1:length(stationUse)){
  data=subset(sfei,date_dec>2000 &Station==stationUse[i])
  
  print(sum(diff(which(is.na(data$do)))==1))
  print(sum(diff(which(is.na(data$temp)))==1))
  print(sum(diff(which(is.na(data$sal)))==1))
  
  
  
}
## [1] 1
## [1] 1
## [1] 2
## [1] 2
## [1] 1
## [1] 2
## [1] 1
## [1] 1
## [1] 1
## [1] 2
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## [1] 1
## missingness same for all variables within a station?
## often but not always
stationUse=c("D10","D12","D4","D22","D26")
for(i in 1:length(stationUse)){
  data=subset(sfei,date_dec>2000 &Station==stationUse[i])
  
  print(intersect(which(is.na(data$do)),which(is.na(data$temp))))
  print(intersect(which(is.na(data$temp)),which(is.na(data$sal))))
 

}
## [1]  6  8 15 16 23
## [1]   6   8  15  16  23 167
## [1]   4   9  16  17 163 169
## [1]   4   9  16  17 163 169
## [1] 169 170
## [1] 169 170
## [1]  15 166 167
## [1]  15 166 167
## [1] 169 170
## [1] 169 170

Simple Imputation

stationUse=c("D10","D12","D4","D22","D26")
dataBeforeImputation=subset(sfei,date_dec>=2000 & Station%in%stationUse)
nrow(dataBeforeImputation) ## 864
## [1] 864
length(which(is.na(dataBeforeImputation$chl))) ## 0 
## [1] 0
length(which(is.na(dataBeforeImputation$pheo))) ## 0
## [1] 0
length(which(is.na(dataBeforeImputation$do))) ## 29
## [1] 29
length(which(is.na(dataBeforeImputation$sal))) ## 21
## [1] 21
length(which(is.na(dataBeforeImputation$temp))) ## 19
## [1] 19

Get complete year-month combinations

sfei=dataBeforeImputation
sfei$yr=as.numeric(as.character(format(as.Date(sfei$Date), "%Y")))
sfei$mon=as.numeric(as.character(format(as.Date(sfei$Date),"%m")))

sfei=sfei[,c("Date","Station","Longitude","Latitude","chl","do","pheo","sal","temp","date_dec","doy","yr","mon")]

yrSeq=seq(2000,2015,by=1)
monSeq=seq(1,12,by=1)

dataTimeFull=as.data.frame(expand.grid(yrSeq,monSeq))
names(dataTimeFull)=c("yr","mon")

D10<-subset(sfei,Station=="D10")
D10f=merge(dataTimeFull,D10,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)

D12<-subset(sfei,Station=="D12")
D12f=merge(dataTimeFull,D12,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)

D22<-subset(sfei,Station=="D22")
D22f=merge(dataTimeFull,D22,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)

D26<-subset(sfei,Station=="D26")
D26f=merge(dataTimeFull,D26,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)

D4<-subset(sfei,Station=="D4")
D4f=merge(dataTimeFull,D4,by.x=c("yr","mon"),by.y=c("yr","mon"),all.x=T)
## now need to fill in NAs

D10f[which(is.na(D10f$Date)),"Date"]=paste(D10f[which(is.na(D10f$Date)),"yr"],D10f[which(is.na(D10f$Date)),"mon"],
                                                  "01",sep="-" )

D10f$Date=as.Date(D10f$Date)
D10f[which(is.na(D10f$Station)),"Station"]="D10"
D10f[which(is.na(D10f$Longitude)),"Longitude"]=D10f$Longitude[1]
D10f[which(is.na(D10f$Latitude)),"Latitude"]=D10f$Latitude[1]
require(lubridate)
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
require(WRTDStidal)
## Loading required package: WRTDStidal
D10f[which(is.na(D10f$date_dec)),"date_dec"]=as.numeric(dec_time(D10f[which(is.na(D10f$date_dec)),"Date"])$dec_time)
D10f[which(is.na(D10f$doy)),"doy"]=as.numeric(strftime(D10f[which(is.na(D10f$doy)),"Date"], format = "%j"))


D12f[which(is.na(D12f$Date)),"Date"]=paste(D12f[which(is.na(D12f$Date)),"yr"],D12f[which(is.na(D12f$Date)),"mon"],
                                           "01",sep="-" )

D12f$Date=as.Date(D12f$Date)
D12f[which(is.na(D12f$Station)),"Station"]="D12"
D12f[which(is.na(D12f$Longitude)),"Longitude"]=D12f$Longitude[1]
D12f[which(is.na(D12f$Latitude)),"Latitude"]=D12f$Latitude[1]
D12f[which(is.na(D12f$date_dec)),"date_dec"]=as.numeric(dec_time(D12f[which(is.na(D12f$date_dec)),"Date"])$dec_time)
D12f[which(is.na(D12f$doy)),"doy"]=as.numeric(strftime(D12f[which(is.na(D12f$doy)),"Date"], format = "%j"))


D22f[which(is.na(D22f$Date)),"Date"]=paste(D22f[which(is.na(D22f$Date)),"yr"],D22f[which(is.na(D22f$Date)),"mon"],
                                           "01",sep="-" )

D22f$Date=as.Date(D22f$Date)
D22f[which(is.na(D22f$Station)),"Station"]="D22"
D22f[which(is.na(D22f$Longitude)),"Longitude"]=D22f$Longitude[1]
D22f[which(is.na(D22f$Latitude)),"Latitude"]=D22f$Latitude[1]
D22f[which(is.na(D22f$date_dec)),"date_dec"]=as.numeric(dec_time(D22f[which(is.na(D22f$date_dec)),"Date"])$dec_time)
D22f[which(is.na(D22f$doy)),"doy"]=as.numeric(strftime(D22f[which(is.na(D22f$doy)),"Date"], format = "%j"))


D26f[which(is.na(D26f$Date)),"Date"]=paste(D26f[which(is.na(D26f$Date)),"yr"],D26f[which(is.na(D26f$Date)),"mon"],
                                           "01",sep="-" )

D26f$Date=as.Date(D26f$Date)
D26f[which(is.na(D26f$Station)),"Station"]="D26"
D26f[which(is.na(D26f$Longitude)),"Longitude"]=D26f$Longitude[1]
D26f[which(is.na(D26f$Latitude)),"Latitude"]=D26f$Latitude[1]
D26f[which(is.na(D26f$date_dec)),"date_dec"]=as.numeric(dec_time(D26f[which(is.na(D26f$date_dec)),"Date"])$dec_time)
D26f[which(is.na(D26f$doy)),"doy"]=as.numeric(strftime(D26f[which(is.na(D26f$doy)),"Date"], format = "%j"))


D4f[which(is.na(D4f$Date)),"Date"]=paste(D4f[which(is.na(D4f$Date)),"yr"],D4f[which(is.na(D4f$Date)),"mon"],
                                           "01",sep="-" )

D4f$Date=as.Date(D4f$Date)
D4f[which(is.na(D4f$Station)),"Station"]="D4"
D4f[which(is.na(D4f$Longitude)),"Longitude"]=D4f$Longitude[1]
D4f[which(is.na(D4f$Latitude)),"Latitude"]=D4f$Latitude[1]
D4f[which(is.na(D4f$date_dec)),"date_dec"]=as.numeric(dec_time(D4f[which(is.na(D4f$date_dec)),"Date"])$dec_time)
D4f[which(is.na(D4f$doy)),"doy"]=as.numeric(strftime(D4f[which(is.na(D4f$doy)),"Date"], format = "%j"))
### now impute actual variables


#D10f[which(is.na(D10f$chl)),"chl"]

toImputeChl=which(is.na(D10f$chl))
toImputeDo=which(is.na(D10f$do))
toImputePheo=which(is.na(D10f$pheo))
toImputeSal=which(is.na(D10f$sal))
toImputeTemp=which(is.na(D10f$temp))

for(i in 1:length(toImputeChl)){
  D10f$chl[toImputeChl[i]]=mean(c(D10f$chl[toImputeChl[i]-1],D10f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D10f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
  D10f$do[toImputeDo[i]]=mean(c(D10f$do[toImputeDo[i]-1],D10f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D10f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
  D10f$pheo[toImputePheo[i]]=mean(c(D10f$pheo[toImputePheo[i]-1],D10f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D10f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
  D10f$sal[toImputeSal[i]]=mean(c(D10f$sal[toImputeSal[i]-1],D10f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D10f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
  D10f$temp[toImputeTemp[i]]=mean(c(D10f$temp[toImputeTemp[i]-1],D10f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D10f$temp))
## [1] 0
###
toImputeChl=which(is.na(D12f$chl))
toImputeDo=which(is.na(D12f$do))
toImputePheo=which(is.na(D12f$pheo))
toImputeSal=which(is.na(D12f$sal))
toImputeTemp=which(is.na(D12f$temp))

for(i in 1:length(toImputeChl)){
  D12f$chl[toImputeChl[i]]=mean(c(D12f$chl[toImputeChl[i]-1],D12f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D12f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
  D12f$do[toImputeDo[i]]=mean(c(D12f$do[toImputeDo[i]-1],D12f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D12f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
  D12f$pheo[toImputePheo[i]]=mean(c(D12f$pheo[toImputePheo[i]-1],D12f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D12f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
  D12f$sal[toImputeSal[i]]=mean(c(D12f$sal[toImputeSal[i]-1],D12f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D12f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
  D12f$temp[toImputeTemp[i]]=mean(c(D12f$temp[toImputeTemp[i]-1],D12f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D12f$temp))
## [1] 0
###

toImputeChl=which(is.na(D22f$chl))
toImputeDo=which(is.na(D22f$do))
toImputePheo=which(is.na(D22f$pheo))
toImputeSal=which(is.na(D22f$sal))
toImputeTemp=which(is.na(D22f$temp))

for(i in 1:length(toImputeChl)){
  D22f$chl[toImputeChl[i]]=mean(c(D22f$chl[toImputeChl[i]-1],D22f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D22f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
  D22f$do[toImputeDo[i]]=mean(c(D22f$do[toImputeDo[i]-1],D22f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D22f$do))
## [1] 1
for(i in 1:length(toImputePheo)){
  D22f$pheo[toImputePheo[i]]=mean(c(D22f$pheo[toImputePheo[i]-1],D22f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D22f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
  D22f$sal[toImputeSal[i]]=mean(c(D22f$sal[toImputeSal[i]-1],D22f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D22f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
  D22f$temp[toImputeTemp[i]]=mean(c(D22f$temp[toImputeTemp[i]-1],D22f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D22f$temp))
## [1] 0
###
toImputeChl=which(is.na(D26f$chl))
toImputeDo=which(is.na(D26f$do))
toImputePheo=which(is.na(D26f$pheo))
toImputeSal=which(is.na(D26f$sal))
toImputeTemp=which(is.na(D26f$temp))

for(i in 1:length(toImputeChl)){
  D26f$chl[toImputeChl[i]]=mean(c(D26f$chl[toImputeChl[i]-1],D26f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D26f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
  D26f$do[toImputeDo[i]]=mean(c(D26f$do[toImputeDo[i]-1],D26f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D26f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
  D26f$pheo[toImputePheo[i]]=mean(c(D26f$pheo[toImputePheo[i]-1],D26f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D26f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
  D26f$sal[toImputeSal[i]]=mean(c(D26f$sal[toImputeSal[i]-1],D26f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D26f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
  D26f$temp[toImputeTemp[i]]=mean(c(D26f$temp[toImputeTemp[i]-1],D26f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D26f$temp))
## [1] 0
###

toImputeChl=which(is.na(D4f$chl))
toImputeDo=which(is.na(D4f$do))
toImputePheo=which(is.na(D4f$pheo))
toImputeSal=which(is.na(D4f$sal))
toImputeTemp=which(is.na(D4f$temp))

for(i in 1:length(toImputeChl)){
  D4f$chl[toImputeChl[i]]=mean(c(D4f$chl[toImputeChl[i]-1],D4f$chl[toImputeChl[i]+1]),na.rm=T)
}
sum(is.na(D4f$chl))
## [1] 0
for(i in 1:length(toImputeDo)){
  D4f$do[toImputeDo[i]]=mean(c(D4f$do[toImputeDo[i]-1],D4f$do[toImputeDo[i]+1]),na.rm=T)
}
sum(is.na(D4f$do))
## [1] 0
for(i in 1:length(toImputePheo)){
  D4f$pheo[toImputePheo[i]]=mean(c(D4f$pheo[toImputePheo[i]-1],D4f$pheo[toImputePheo[i]+1]),na.rm=T)
}
sum(is.na(D4f$pheo))
## [1] 0
for(i in 1:length(toImputeSal)){
  D4f$sal[toImputeSal[i]]=mean(c(D4f$sal[toImputeSal[i]-1],D4f$sal[toImputeSal[i]+1]),na.rm=T)
}
sum(is.na(D4f$sal))
## [1] 0
for(i in 1:length(toImputeTemp)){
  D4f$temp[toImputeTemp[i]]=mean(c(D4f$temp[toImputeTemp[i]-1],D4f$temp[toImputeTemp[i]+1]),na.rm=T)
}
sum(is.na(D4f$temp))
## [1] 0
###
tail(D10f,20)
##       yr mon       Date Station Longitude Latitude  chl     do pheo   sal
## 173 2014   5 2014-05-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 174 2014   6 2014-06-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 175 2014   7 2014-07-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 176 2014   8 2014-08-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 177 2014   9 2014-09-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 178 2014  10 2014-10-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 179 2014  11 2014-11-01     D10 -121.9183 38.04631 1.24 10.000 1.46 13.50
## 180 2014  12 2014-12-01     D10 -121.9183 38.04631 0.80  9.700 1.63 13.50
## 181 2015   1 2015-01-16     D10 -121.9183 38.04631 0.36  9.400 1.80 13.50
## 182 2015   2 2015-02-13     D10 -121.9183 38.04631 1.83  9.175 1.06  6.75
## 183 2015   3 2015-03-13     D10 -121.9183 38.04631 4.41  8.950 0.74  0.00
## 184 2015   4 2015-04-13     D10 -121.9183 38.04631 1.50  8.550 1.09  7.70
## 185 2015   5 2015-05-13     D10 -121.9183 38.04631 2.04  8.400 1.56 10.20
## 186 2015   6 2015-06-10     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 187 2015   7 2015-07-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 188 2015   8 2015-08-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 189 2015   9 2015-09-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 190 2015  10 2015-10-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 191 2015  11 2015-11-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
## 192 2015  12 2015-12-01     D10 -121.9183 38.04631 3.67  7.850 1.10  9.40
##      temp date_dec doy
## 173  8.96 2014.332 121
## 174  8.96 2014.416 152
## 175  8.96 2014.499 182
## 176  8.96 2014.584 213
## 177  8.96 2014.668 244
## 178  8.96 2014.751 274
## 179  8.96 2014.836 305
## 180  8.96 2014.918 335
## 181 11.38 2015.047  16
## 182 13.80 2015.123  44
## 183 15.80 2015.200  72
## 184 17.16 2015.285 103
## 185 17.50 2015.367 133
## 186 20.55 2015.444 161
## 187 20.55 2015.499 182
## 188 20.55 2015.584 213
## 189 20.55 2015.668 244
## 190 20.55 2015.751 274
## 191 20.55 2015.836 305
## 192 20.55 2015.918 335
## need to trim july on in 2015

D10f=D10f[1:which(D10f$yr=="2015" & D10f$mon=="6"),]
D12f=D12f[1:which(D12f$yr=="2015" & D12f$mon=="6"),]
D22f=D22f[1:which(D22f$yr=="2015" & D22f$mon=="6"),]
D26f=D26f[1:which(D26f$yr=="2015" & D26f$mon=="6"),]
D4f=D4f[1:which(D4f$yr=="2015" & D4f$mon=="6"),]

D22f$Longitude[1]=D22f$Longitude[2]
D22f$Latitude[1]=D22f$Latitude[2]
D22f$do[1]=D22f$do[2]
setwd("~/UC_Berkeley/Semester_4/timeSeries")
afterImputation=rbind(D10f,D12f,D22f,D26f,D4f)
write.csv(afterImputation,"sfeiDataForProject.csv",row.names=F)

De-trend, de-seasonalize, variance stabilization

setwd("~/UC_Berkeley/Semester_4/timeSeries")
sfeiData<-read.csv("sfeiDataForProject.csv",stringsAsFactors=FALSE)
names(sfeiData)
##  [1] "yr"        "mon"       "Date"      "Station"   "Longitude"
##  [6] "Latitude"  "chl"       "do"        "pheo"      "sal"      
## [11] "temp"      "date_dec"  "doy"
## "chl"       "do"        "pheo"      "sal"       "temp"   

length(unique(sfeiData$Station)) ## 5
## [1] 5
unique(sfeiData$Station) ## "D10" "D12" "D22" "D26" "D4" 
## [1] "D10" "D12" "D22" "D26" "D4"
D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$chl,type="l")

require(mgcv)
## Loading required package: mgcv
## Loading required package: nlme
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
require(parallel)  
## Loading required package: parallel
nc <- 4   ## have up to 8
if (detectCores()>1) { ## no point otherwise
  cl <- makeCluster(nc) 
} else cl <- NULL
#### chl ####
sfeiData$Station=as.factor(sfeiData$Station)

chlSeasonalTimeTrend=bam(chl~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)

summary(chlSeasonalTimeTrend)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## chl ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) + 
##     s(doy, bs = "cs", by = Station, k = 25)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.25077    0.21501  10.468   <2e-16 ***
## StationD12   0.15245    0.30404   0.501   0.6162    
## StationD22   0.62694    0.30405   2.062   0.0395 *  
## StationD26  -0.10304    0.30409  -0.339   0.7348    
## StationD4    0.07352    0.30405   0.242   0.8090    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F  p-value    
## s(date_dec):StationD10 6.399e-06     24 0.000 0.749071    
## s(date_dec):StationD12 5.068e-05     24 0.000 0.427507    
## s(date_dec):StationD22 1.174e+00     24 0.131 0.065529 .  
## s(date_dec):StationD26 2.027e+00     24 0.316 0.012899 *  
## s(date_dec):StationD4  1.191e-05     24 0.000 0.475189    
## s(doy):StationD10      3.499e+00     24 0.982 1.47e-05 ***
## s(doy):StationD12      2.613e+00     24 0.574 0.000860 ***
## s(doy):StationD22      3.483e+00     24 0.606 0.001719 ** 
## s(doy):StationD26      2.730e+00     24 0.543 0.001524 ** 
## s(doy):StationD4       2.929e+00     24 0.712 0.000218 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0938   Deviance explained = 11.6%
## fREML = 2342.9  Scale est. = 8.596     n = 930
gam.check(chlSeasonalTimeTrend)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 16 iterations.
## Gradient range [-7.344914e-06,1.798371e-05]
## (score 2342.87 & scale 8.595965).
## Hessian positive definite, eigenvalue range [2.343281e-06,462.5285].
## Model rank =  245 / 245 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf  k-index p-value
## s(date_dec):StationD10 2.40e+01 6.40e-06 8.32e-01    0.00
## s(date_dec):StationD12 2.40e+01 5.07e-05 8.32e-01    0.02
## s(date_dec):StationD22 2.40e+01 1.17e+00 8.32e-01    0.00
## s(date_dec):StationD26 2.40e+01 2.03e+00 8.32e-01    0.00
## s(date_dec):StationD4  2.40e+01 1.19e-05 8.32e-01    0.00
## s(doy):StationD10      2.40e+01 3.50e+00 9.77e-01    0.10
## s(doy):StationD12      2.40e+01 2.61e+00 9.77e-01    0.12
## s(doy):StationD22      2.40e+01 3.48e+00 9.77e-01    0.19
## s(doy):StationD26      2.40e+01 2.73e+00 9.77e-01    0.14
## s(doy):StationD4       2.40e+01 2.93e+00 9.77e-01    0.11
plot(chlSeasonalTimeTrend)

## datedec flat for D10, D12, D4
## increasing for D22
## curved updward for D26
## doy peak in 100s for D10, D12, D4
## plateau 100 to 250 for D22, D26

sfeiData$predGAMchl=chlSeasonalTimeTrend$fitted
sfeiData$residGAMchl=chlSeasonalTimeTrend$resid

D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$chl,type="l")
lines(D10$residGAMchl,col="red")
lines(D10$predGAMchl,col="blue")

par(mfrow=c(2,1))
plot(D10$residGAMchl,type="l") ## check for stationarity
plot(log(D10$residGAMchl+abs(min(D10$residGAMchl))),type="l")

plot(D12$residGAMchl,type="l")
plot(log(D12$residGAMchl+abs(min(D12$residGAMchl))),type="l")

plot(D22$residGAMchl,type="l")
plot(log(D22$residGAMchl+abs(min(D22$residGAMchl))),type="l")

plot(D26$residGAMchl,type="l")
plot(log(D26$residGAMchl+abs(min(D26$residGAMchl))),type="l")

plot(D4$residGAMchl,type="l")
plot(log(D4$residGAMchl+abs(min(D4$residGAMchl))),type="l")

D10$residGAMchlTransform=log(D10$residGAMchl+abs(min(D10$residGAMchl))+1)
D12$residGAMchlTransform=log(D12$residGAMchl+abs(min(D12$residGAMchl))+1)
D22$residGAMchlTransform=log(D22$residGAMchl+abs(min(D22$residGAMchl))+1)
D26$residGAMchlTransform=log(D26$residGAMchl+abs(min(D26$residGAMchl))+1)
D4$residGAMchlTransform=log(D4$residGAMchl+abs(min(D4$residGAMchl))+1)
                             
sfeiData=rbind(D10,D12,D22,D26,D4)
doSeasonalTimeTrend=bam(do~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(doSeasonalTimeTrend)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## do ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) + 
##     s(doy, bs = "cs", by = Station, k = 25)
## 
## Parametric coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  9.048376   0.036353 248.906  < 2e-16 ***
## StationD12  -0.071472   0.051404  -1.390    0.165    
## StationD22   0.019413   0.051405   0.378    0.706    
## StationD26  -0.225266   0.051433  -4.380 1.34e-05 ***
## StationD4   -0.003852   0.051406  -0.075    0.940    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf Ref.df      F  p-value    
## s(date_dec):StationD10 13.245     24  2.253 1.58e-07 ***
## s(date_dec):StationD12 12.734     24  2.263 8.92e-08 ***
## s(date_dec):StationD22 14.289     24  2.911 4.26e-10 ***
## s(date_dec):StationD26 14.006     24  2.700 3.00e-09 ***
## s(date_dec):StationD4  12.954     24  1.888 5.27e-06 ***
## s(doy):StationD10       5.941     24 10.532  < 2e-16 ***
## s(doy):StationD12       5.417     24 11.220  < 2e-16 ***
## s(doy):StationD22       5.491     24 10.708  < 2e-16 ***
## s(doy):StationD26       6.466     24 14.966  < 2e-16 ***
## s(doy):StationD4        5.808     24 11.506  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.651   Deviance explained = 68.8%
## fREML = 852.35  Scale est. = 0.24566   n = 930
gam.check(doSeasonalTimeTrend)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-5.473219e-06,8.978749e-06]
## (score 852.3495 & scale 0.2456584).
## Hessian positive definite, eigenvalue range [1.511284,463.0843].
## Model rank =  245 / 245 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value
## s(date_dec):StationD10 24.000 13.245   0.349       0
## s(date_dec):StationD12 24.000 12.734   0.349       0
## s(date_dec):StationD22 24.000 14.289   0.349       0
## s(date_dec):StationD26 24.000 14.006   0.349       0
## s(date_dec):StationD4  24.000 12.954   0.349       0
## s(doy):StationD10      24.000  5.941   0.811       0
## s(doy):StationD12      24.000  5.417   0.811       0
## s(doy):StationD22      24.000  5.491   0.811       0
## s(doy):StationD26      24.000  6.466   0.811       0
## s(doy):StationD4       24.000  5.808   0.811       0
plot(doSeasonalTimeTrend)

## datedec slight concave D10, D12, D22 D4
## flatter D26
## doy peak 50, min peak 200 all

sfeiData$predGAMdo=doSeasonalTimeTrend$fitted
sfeiData$residGAMdo=doSeasonalTimeTrend$resid


D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$do,type="l")
lines(D10$residGAMdo,col="red")
lines(D10$predGAMdo,col="blue")

## these don't seem to help stick with original
par(mfrow=c(2,1))
plot(D10$residGAMdo,type="l") ## check for stationarity
plot(log(D10$residGAMdo+abs(min(D10$residGAMdo))),type="l")

##http://stackoverflow.com/questions/26617587/finding-optimal-lambda-for-box-cox-transform-in-r
require(MASS)
## Loading required package: MASS
out <- boxcox(lm(D10$residGAMdo+abs(min(D10$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.7474747 1.7979798
plot(D12$residGAMdo,type="l")

plot(log(D12$residGAMdo+abs(min(D12$residGAMdo))),type="l")

out <- boxcox(lm(D12$residGAMdo+abs(min(D12$residGAMdo))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.3838384 1.3535354
plot(D22$residGAMdo,type="l")
plot(log(D22$residGAMdo+abs(min(D22$residGAMdo))),type="l")

out <- boxcox(lm(D22$residGAMdo+abs(min(D22$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.6262626 1.5151515
plot(D26$residGAMdo,type="l")

plot(log(D26$residGAMdo+abs(min(D26$residGAMdo))),type="l")

out <- boxcox(lm(D26$residGAMdo+abs(min(D26$residGAMdo))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep as is
## [1] 0.6262626 1.7171717
plot(D4$residGAMdo,type="l")
plot(log(D4$residGAMdo+abs(min(D4$residGAMdo))),type="l")

out <- boxcox(lm(D4$residGAMdo+abs(min(D4$residGAMdo))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ##
## [1] 0.9090909 1.9595960
require(forecast)
## Loading required package: forecast
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: timeDate
## This is forecast 7.3
## 
## Attaching package: 'forecast'
## The following object is masked from 'package:nlme':
## 
##     getResponse
plot(D4$residGAMdo,type="l")

trans.vector = BoxCox( D4$residGAMdo+abs(min(D4$residGAMdo))+1, 1.5)
plot(trans.vector,type="l") ## basically the same

#### pheo ####

pheoSeasonalTimeTrend=bam(pheo~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)

summary(pheoSeasonalTimeTrend)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## pheo ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) + 
##     s(doy, bs = "cs", by = Station, k = 25)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.29127    0.05781  22.338   <2e-16 ***
## StationD12  -0.04743    0.08174  -0.580   0.5619    
## StationD22  -0.02635    0.08174  -0.322   0.7473    
## StationD26  -0.26559    0.08175  -3.249   0.0012 ** 
## StationD4   -0.05826    0.08174  -0.713   0.4762    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df     F p-value    
## s(date_dec):StationD10 3.944e-06     24 0.000  0.9462    
## s(date_dec):StationD12 4.283e-06     24 0.000  0.7990    
## s(date_dec):StationD22 1.537e+00     24 0.256  0.0142 *  
## s(date_dec):StationD26 1.512e+00     24 0.131  0.1115    
## s(date_dec):StationD4  3.518e-06     24 0.000  0.7584    
## s(doy):StationD10      4.253e+00     24 1.091   1e-05 ***
## s(doy):StationD12      1.886e+00     24 0.341  0.0071 ** 
## s(doy):StationD22      9.002e-05     24 0.000  0.3385    
## s(doy):StationD26      2.296e+00     24 0.345  0.0124 *  
## s(doy):StationD4       2.191e+00     24 0.291  0.0233 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.0687   Deviance explained = 8.64%
## fREML =   1121  Scale est. = 0.62131   n = 930
gam.check(pheoSeasonalTimeTrend)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 18 iterations.
## Gradient range [-1.809772e-06,4.248297e-06]
## (score 1120.987 & scale 0.6213097).
## Hessian positive definite, eigenvalue range [1.349349e-06,462.5197].
## Model rank =  245 / 245 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf  k-index p-value
## s(date_dec):StationD10 2.40e+01 3.94e-06 7.86e-01    0.00
## s(date_dec):StationD12 2.40e+01 4.28e-06 7.86e-01    0.00
## s(date_dec):StationD22 2.40e+01 1.54e+00 7.86e-01    0.00
## s(date_dec):StationD26 2.40e+01 1.51e+00 7.86e-01    0.00
## s(date_dec):StationD4  2.40e+01 3.52e-06 7.86e-01    0.00
## s(doy):StationD10      2.40e+01 4.25e+00 9.39e-01    0.05
## s(doy):StationD12      2.40e+01 1.89e+00 9.39e-01    0.04
## s(doy):StationD22      2.40e+01 9.00e-05 9.39e-01    0.04
## s(doy):StationD26      2.40e+01 2.30e+00 9.39e-01    0.04
## s(doy):StationD4       2.40e+01 2.19e+00 9.39e-01    0.06
plot(pheoSeasonalTimeTrend)

## date dec flat D10, D12, D4
## downward trend D22
## down and up D26
## doy, peak 125, low point 275 D10 
## concave slight peak around 150 D12, D26, D4 (even slighter for D22)


sfeiData$predGAMpheo=pheoSeasonalTimeTrend$fitted
sfeiData$residGAMpheo=pheoSeasonalTimeTrend$resid


D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$pheo,type="l")
lines(D10$residGAMpheo,col="red")
lines(D10$predGAMpheo,col="blue")

par(mfrow=c(2,1))
plot(D10$residGAMpheo,type="l") ## check for stationarity
plot(log(D10$residGAMpheo+abs(min(D10$residGAMpheo))),type="l")

plot(D12$residGAMpheo,type="l")
plot(log(D12$residGAMpheo+abs(min(D12$residGAMpheo))),type="l")

plot(D22$residGAMpheo,type="l")
plot(log(D22$residGAMpheo+abs(min(D22$residGAMpheo))),type="l")

plot(D26$residGAMpheo,type="l")
plot(log(D26$residGAMpheo+abs(min(D26$residGAMpheo))),type="l")

plot(D4$residGAMpheo,type="l")
plot(log(D4$residGAMpheo+abs(min(D4$residGAMpheo))),type="l")

D10$residGAMpheoTransform=log(D10$residGAMpheo+abs(min(D10$residGAMpheo))+1)
D12$residGAMpheoTransform=log(D12$residGAMpheo+abs(min(D12$residGAMpheo))+1)
D22$residGAMpheoTransform=log(D22$residGAMpheo+abs(min(D22$residGAMpheo))+1)
D26$residGAMpheoTransform=log(D26$residGAMpheo+abs(min(D26$residGAMpheo))+1)
D4$residGAMpheoTransform=log(D4$residGAMpheo+abs(min(D4$residGAMpheo))+1)
sfeiData=rbind(D10,D12,D22,D26,D4)
salSeasonalTimeTrend=bam(sal~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(salSeasonalTimeTrend)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## sal ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) + 
##     s(doy, bs = "cs", by = Station, k = 25)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.1895     0.1332   38.97   <2e-16 ***
## StationD12   -3.4021     0.1883  -18.07   <2e-16 ***
## StationD22   -4.0470     0.1883  -21.49   <2e-16 ***
## StationD26   -5.0225     0.1883  -26.68   <2e-16 ***
## StationD4    -2.2796     0.1883  -12.11   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                              edf Ref.df      F  p-value    
## s(date_dec):StationD10 1.716e+01     24 15.183  < 2e-16 ***
## s(date_dec):StationD12 5.482e+00     24  2.185 6.91e-11 ***
## s(date_dec):StationD22 4.847e+00     24  2.100 7.97e-11 ***
## s(date_dec):StationD26 1.084e-05     24  0.000        1    
## s(date_dec):StationD4  1.265e+01     24  9.474  < 2e-16 ***
## s(doy):StationD10      6.852e+00     24 16.734  < 2e-16 ***
## s(doy):StationD12      3.236e+00     24  2.203 1.32e-12 ***
## s(doy):StationD22      2.834e+00     24  1.886 3.14e-11 ***
## s(doy):StationD26      1.054e-05     24  0.000        1    
## s(doy):StationD4       4.802e+00     24  6.041  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =    0.7   Deviance explained =   72%
## fREML = 1979.1  Scale est. = 3.2959    n = 930
gam.check(salSeasonalTimeTrend)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 15 iterations.
## Gradient range [-5.244617e-06,3.029862e-07]
## (score 1979.144 & scale 3.295859).
## Hessian positive definite, eigenvalue range [5.143902e-06,462.824].
## Model rank =  245 / 245 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                              k'      edf  k-index p-value
## s(date_dec):StationD10 2.40e+01 1.72e+01 6.27e-01       0
## s(date_dec):StationD12 2.40e+01 5.48e+00 6.27e-01       0
## s(date_dec):StationD22 2.40e+01 4.85e+00 6.27e-01       0
## s(date_dec):StationD26 2.40e+01 1.08e-05 6.27e-01       0
## s(date_dec):StationD4  2.40e+01 1.26e+01 6.27e-01       0
## s(doy):StationD10      2.40e+01 6.85e+00 8.48e-01       0
## s(doy):StationD12      2.40e+01 3.24e+00 8.48e-01       0
## s(doy):StationD22      2.40e+01 2.83e+00 8.48e-01       0
## s(doy):StationD26      2.40e+01 1.05e-05 8.48e-01       0
## s(doy):StationD4       2.40e+01 4.80e+00 8.48e-01       0
plot(salSeasonalTimeTrend)

## date dec D10 wiggly peak 2008, same wiggles but dampened D12, D4
## concave plateau D22
## flat D26
## doy min peak 75, peak 325 D10, D4 same but dampened D12, D22
## flat D26

sfeiData$predGAMsal=salSeasonalTimeTrend$fitted
sfeiData$residGAMsal=salSeasonalTimeTrend$resid


D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$sal,type="l") 
lines(D10$residGAMsal,col="red")
lines(D10$predGAMsal,col="blue")

## these don't seem to help, stick to original
par(mfrow=c(2,1))
plot(D10$residGAMsal,type="l") ## check for stationarity
plot(log(D10$residGAMsal+abs(min(D10$residGAMsal))),type="l")

out <- boxcox(lm(D10$residGAMsal+abs(min(D10$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## one is consistent, keep this the same
## [1] 0.7878788 1.3535354
plot(D12$residGAMsal,type="l")

plot(log(D12$residGAMsal+abs(min(D12$residGAMsal))),type="l")

out <- boxcox(lm(D12$residGAMsal+abs(min(D12$residGAMsal))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 
## [1] 0.3030303 0.8282828
trans.vector = BoxCox( D12$residGAMsal+abs(min(D12$residGAMsal))+1, .5)
plot(D12$residGAMsal,type="l")
plot(trans.vector,type="l") ## looks the same

plot(D22$residGAMsal,type="l")
plot(log(D22$residGAMsal+abs(min(D22$residGAMsal))),type="l")

out <- boxcox(lm(D22$residGAMsal+abs(min(D22$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## this suggest log is better, actually change
## [1] -0.1818182  0.1010101
plot(D26$residGAMsal,type="l")

plot(log(D26$residGAMsal+abs(min(D26$residGAMsal))),type="l")

out <- boxcox(lm(D26$residGAMsal+abs(min(D26$residGAMsal))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 
## [1] -2.00000 -1.59596
trans.vector = BoxCox( D26$residGAMsal+abs(min(D26$residGAMsal))+1, -1.75)
plot(D26$residGAMsal,type="l")
plot(trans.vector,type="l") ## looks the same

plot(D4$residGAMsal,type="l")
plot(log(D4$residGAMsal+abs(min(D4$residGAMsal))),type="l")

out <- boxcox(lm(D4$residGAMsal+abs(min(D4$residGAMsal))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.6262626 1.1919192
D22$residGAMsalTransform=log(D22$residGAMsal+abs(min(D22$residGAMsal))+1)

require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:nlme':
## 
##     collapse
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
sfeiData=bind_rows(D10,D12,D22,D26,D4)

tempSeasonalTimeTrend=bam(temp~Station+s(date_dec,bs="cs",by=Station,k=25)+s(doy,bs="cs",by=Station,k=25),data=sfeiData,cluster=cl)
summary(tempSeasonalTimeTrend)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## temp ~ Station + s(date_dec, bs = "cs", by = Station, k = 25) + 
##     s(doy, bs = "cs", by = Station, k = 25)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  15.9100     0.1061 150.017   <2e-16 ***
## StationD12    0.2163     0.1500   1.442    0.150    
## StationD22    0.1298     0.1500   0.866    0.387    
## StationD26    0.2958     0.1501   1.971    0.049 *  
## StationD4     0.1274     0.1500   0.849    0.396    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                           edf Ref.df     F p-value    
## s(date_dec):StationD10 19.609     24 12.71  <2e-16 ***
## s(date_dec):StationD12 19.075     24 11.21  <2e-16 ***
## s(date_dec):StationD22 20.329     24 14.04  <2e-16 ***
## s(date_dec):StationD26 20.031     24 12.95  <2e-16 ***
## s(date_dec):StationD4  19.684     24 12.64  <2e-16 ***
## s(doy):StationD10       7.472     24 52.31  <2e-16 ***
## s(doy):StationD12       7.726     24 60.26  <2e-16 ***
## s(doy):StationD22       7.875     24 60.40  <2e-16 ***
## s(doy):StationD26       7.573     24 68.79  <2e-16 ***
## s(doy):StationD4        7.679     24 56.06  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.906   Deviance explained =   92%
## fREML = 1954.4  Scale est. = 2.0903    n = 930
gam.check(tempSeasonalTimeTrend)

## 
## Method: fREML   Optimizer: perf newton
## full convergence after 6 iterations.
## Gradient range [-7.594998e-06,6.200114e-06]
## (score 1954.41 & scale 2.090252).
## Hessian positive definite, eigenvalue range [2.564866,463.7284].
## Model rank =  245 / 245 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                            k'    edf k-index p-value
## s(date_dec):StationD10 24.000 19.609   0.184       0
## s(date_dec):StationD12 24.000 19.075   0.184       0
## s(date_dec):StationD22 24.000 20.329   0.184       0
## s(date_dec):StationD26 24.000 20.031   0.184       0
## s(date_dec):StationD4  24.000 19.684   0.184       0
## s(doy):StationD10      24.000  7.472   0.618       0
## s(doy):StationD12      24.000  7.726   0.618       0
## s(doy):StationD22      24.000  7.875   0.618       0
## s(doy):StationD26      24.000  7.573   0.618       0
## s(doy):StationD4       24.000  7.679   0.618       0
plot(tempSeasonalTimeTrend)

## datedec nearly flat D10, D12, D26, D4
## flat D22
## doy peak 200 all

sfeiData$predGAMtemp=tempSeasonalTimeTrend$fitted
sfeiData$residGAMtemp=tempSeasonalTimeTrend$resid


D10<-subset(sfeiData,Station=="D10")
D12<-subset(sfeiData,Station=="D12")
D22<-subset(sfeiData,Station=="D22")
D26<-subset(sfeiData,Station=="D26")
D4<-subset(sfeiData,Station=="D4")

plot(D10$temp,type="l")
lines(D10$residGAMtemp,col="red")
lines(D10$predGAMtemp,col="blue")

## doesn't really help, stick with original
par(mfrow=c(2,1))
plot(D10$residGAMtemp,type="l") ## check for stationarity
plot(log(D10$residGAMtemp+abs(min(D10$residGAMtemp))),type="l")

out <- boxcox(lm(D10$residGAMtemp+abs(min(D10$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.7070707 1.3131313
plot(D12$residGAMtemp,type="l")

plot(log(D12$residGAMtemp+abs(min(D12$residGAMtemp))),type="l")

out <- boxcox(lm(D12$residGAMtemp+abs(min(D12$residGAMtemp))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.8686869 1.5151515
plot(D22$residGAMtemp,type="l")
plot(log(D22$residGAMtemp+abs(min(D22$residGAMtemp))),type="l")

out <- boxcox(lm(D22$residGAMtemp+abs(min(D22$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.8686869 1.5151515
plot(D26$residGAMtemp,type="l")

plot(log(D26$residGAMtemp+abs(min(D26$residGAMtemp))),type="l")

out <- boxcox(lm(D26$residGAMtemp+abs(min(D26$residGAMtemp))+1~1))

range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.9494949 1.6363636
plot(D4$residGAMtemp,type="l")
plot(log(D4$residGAMtemp+abs(min(D4$residGAMtemp))),type="l")

out <- boxcox(lm(D4$residGAMtemp+abs(min(D4$residGAMtemp))+1~1))
range(out$x[out$y > max(out$y)-qchisq(0.95,1)/2]) ## 1 is consistent, keep
## [1] 0.7878788 1.4343434

setwd("~/UC_Berkeley/Semester_4/timeSeries")
write.csv(D10,"D10data.csv",row.names=F)
write.csv(D12,"D12data.csv",row.names=F)
write.csv(D22,"D22data.csv",row.names=F)
write.csv(D26,"D26data.csv",row.names=F)
write.csv(D4,"D4data.csv",row.names=F)

Frequency Domain

acfTest=function(stationData,nutrient){
  if(sum(grepl(paste(nutrient,"Transform",sep=""),names(stationData)))>0){
    varName=paste("residGAM",nutrient,"Transform",sep="")
  }else{
    varName=paste("residGAM",nutrient,sep="")
  }
  
  empP<-c()
  for(i in 2:nrow(stationData) ){
    test1=stationData[c((i):nrow(stationData),1:(i-1)),varName]
    
    test=acf(test1,lag.max=12,plot=F)
    acfOfInterest=test$acf[2:13]
    empP<-c( empP,acfOfInterest[which.max(abs(acfOfInterest))] )
   # print(i)
  }
  
  
  test=acf(stationData[,varName],lag.max=12,plot=F)
  acfOfInterest=test$acf[2:13]
  lagOfInterest=test$lag[2:13]
  maxLag=lagOfInterest[which.max(abs(acfOfInterest))]
  if(acfOfInterest[which.max(abs(acfOfInterest))]<0){
    pVal=length(which(empP<acfOfInterest[which.max(abs(acfOfInterest))]))/nrow(stationData)
    
  }else{
    pVal=length(which(empP>acfOfInterest[which.max(abs(acfOfInterest))]))/nrow(stationData)
  }
  minPval=1/nrow(stationData)
  
  return(list(maxLag=maxLag,pVal=pVal,minPval=minPval))
  
}

acfTest(D10,"chl")
## $maxLag
## [1] 1
## 
## $pVal
## [1] 0.8064516
## 
## $minPval
## [1] 0.005376344
ccfTest=function(station1Data,station2Data,station1Nutrient,station2Nutrient){
  
  if(sum(grepl(paste(station1Nutrient,"Transform",sep=""),names(station1Data)))>0){
    varName=paste("residGAM",station1Nutrient,"Transform",sep="")
  }else{
    varName=paste("residGAM",station1Nutrient,sep="")
  }
  
  if(sum(grepl(paste(station2Nutrient,"Transform",sep=""),names(station2Data)))>0){
    varName2=paste("residGAM",station2Nutrient,"Transform",sep="")
  }else{
    varName2=paste("residGAM",station2Nutrient,sep="")
  }
  
  empP<-c()
  for(i in 2:nrow(station2Data) ){
    test1=station2Data[c((i):nrow(station2Data),1:(i-1)),varName2]
    
    test=ccf(station1Data[,varName],test1,lag.max=12,plot=F)
    ccfOfInterest=test$acf[14:25]
    empP<-c( empP,ccfOfInterest[which.max(abs(ccfOfInterest))] )
  #  print(i)
  }
  test=ccf(station1Data[,varName],station2Data[,varName2],lag.max=12,plot=F)
  ccfOfInterest=test$acf[14:25]
  lagOfInterest=test$lag[14:25]
  maxLag=lagOfInterest[which.max(abs(ccfOfInterest))]
  if(ccfOfInterest[which.max(abs(ccfOfInterest))]<0){
    pVal=length(which(empP<ccfOfInterest[which.max(abs(ccfOfInterest))]))/nrow(station2Data)
    
  }else{
  pVal=length(which(empP>ccfOfInterest[which.max(abs(ccfOfInterest))]))/nrow(station2Data)
  }
  minPval=1/nrow(station2Data)
  
  return(list(maxLag=maxLag,pVal=pVal,minPval=minPval))
}

ccfTest(D10,D12,"chl","chl")
## $maxLag
## [1] 12
## 
## $pVal
## [1] 0.2150538
## 
## $minPval
## [1] 0.005376344

All possible combinations: do time analysis automatically for all pairs

stationNames<-c("D10","D12","D22","D26","D4")
varNames<-c("chl","do","pheo","sal","temp")

allCombo=expand.grid(stationNames,stationNames,varNames,varNames)
dim(allCombo)
head(allCombo)
names(allCombo)=c("station1","station2","var1","var2")

withinStation=allCombo[which(allCombo$station1==allCombo$station2),]
dim(withinStation) ## 125

withinStationSameVar=withinStation[which(withinStation$var1==withinStation$var2),]
dim(withinStationSameVar) ## 25

withinStationDiffVar=withinStation[which(withinStation$var1!=withinStation$var2),]
dim(withinStationDiffVar) ## 100

leftOver=allCombo[-which(allCombo$station1==allCombo$station2),]
acrossStationSameVar=leftOver[which(leftOver$var1==leftOver$var2),]
dim(acrossStationSameVar) ## 100

leftOver2=leftOver[-which(leftOver$var1==leftOver$var2),]
dim(leftOver2)

acrossStationDiffVar=leftOver2[which(leftOver2$var1!=leftOver2$var2),]
dim(acrossStationDiffVar) ## 400 same as leftOver2 as expected 

setwd("~/UC_Berkeley/Semester_4/timeSeries")
D10<-read.csv("D10data.csv",stringsAsFactors=F)
D12<-read.csv("D12data.csv",stringsAsFactors=F)
D22<-read.csv("D22data.csv",stringsAsFactors=F)
D26<-read.csv("D26data.csv",stringsAsFactors=F)
D4<-read.csv("D4data.csv",stringsAsFactors=F)

storeData=vector("list",length(stationNames))
storeData[[1]]=D10
storeData[[2]]=D12
storeData[[3]]=D22
storeData[[4]]=D26
storeData[[5]]=D4
names(storeData)=stationNames

setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-c()
for(i in 1:nrow(acrossStationSameVar)){
  
 res= ccfTest(storeData[[acrossStationSameVar$station1[i]]],storeData[[acrossStationSameVar$station2[i]]],
          as.character(acrossStationSameVar$var1[i]),as.character(acrossStationSameVar$var2[i]))

 
 acrossStationSameVarResults<-rbind(acrossStationSameVarResults,c(res$maxLag,res$pVal,res$minPval))
print(i)
}

write.csv(acrossStationSameVarResults,"acrossStationSameVarResults.csv",row.names=F)


acrossStationDiffVarResults<-c()
for(i in 1:nrow(acrossStationDiffVar)){
  res= ccfTest(storeData[[acrossStationDiffVar$station1[i]]],storeData[[acrossStationDiffVar$station2[i]]],
               as.character(acrossStationDiffVar$var1[i]),as.character(acrossStationDiffVar$var2[i]))
  
  
  acrossStationDiffVarResults<-rbind(acrossStationDiffVarResults,c(res$maxLag,res$pVal,res$minPval))
  print(i)
}

write.csv(acrossStationDiffVarResults,"acrossStationDiffVarResults.csv",row.names=F)


withinStationDiffVarResults<-c()
for(i in 1:nrow(withinStationDiffVar)){
  res=ccfTest(storeData[[withinStationDiffVar$station1[i]]],storeData[[withinStationDiffVar$station2[i]]],
              as.character(withinStationDiffVar$var1[i]),as.character(withinStationDiffVar$var2[i]))
  withinStationDiffVarResults<-rbind(withinStationDiffVarResults,c(res$maxLag,res$pVal,res$minPval))
  print(i)
}
write.csv(withinStationDiffVarResults,"withinStationDiffVarResults.csv",row.names=F)


## don't actually use this, not one of the research questions
withinStationSameVarResults<-c()
for(i in 1:nrow(withinStationSameVar)){
  res=acfTest(storeData[[withinStationSameVar$station1[i]]],as.character(withinStationSameVar$var1[i]))
  withinStationSameVarResults<-rbind(withinStationSameVarResults,c(res$maxLag,res$pVal,res$minPval))
  print(i)
}

write.csv(withinStationSameVarResults,"withinStationSameVarResults.csv",row.names=F)

Frequency domain

require(psd)
## Loading required package: psd
## Loaded psd (1.0.1) -- Adaptive multitaper spectrum estimation
ccfTestFreq=function(station1Data,station2Data,station1Nutrient,station2Nutrient){

  if(sum(grepl(paste(station1Nutrient,"Transform",sep=""),names(station1Data)))>0){
    varName=paste("residGAM",station1Nutrient,"Transform",sep="")
  }else{
    varName=paste("residGAM",station1Nutrient,sep="")
  }
  
  if(sum(grepl(paste(station2Nutrient,"Transform",sep=""),names(station2Data)))>0){
    varName2=paste("residGAM",station2Nutrient,"Transform",sep="")
  }else{
    varName2=paste("residGAM",station2Nutrient,sep="")
  }
 
  ## doesn't change results
  #tryThis=prewhiten(station1Data[,varName],AR.max=2)
  #tryThis2=prewhiten(station2Data[,varName2],AR.max=2)
  #test=spectrum(cbind(tryThis$prew_ar,tryThis2$prew_ar),taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F)
  
  
  test=spectrum( cbind(station1Data[,varName],station2Data[,varName2]),taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F) 
  
  
  pVal=1-pf(test$coh/(1-test$coh)*(test$df/2-1),2,test$df-2)[which.max(test$coh)]

  maxPhase=test$phase[which.max(test$coh)]
  maxCoh=test$coh[which.max(test$coh)]
  maxFreq=1/test$freq[which.max(test$coh)]
  
  
  return(list(maxPhase=maxPhase,maxCoh=maxCoh,maxFreq=maxFreq,pVal=pVal))
}

ccfTestFreq(D10,D12,"chl","pheo")
## $maxPhase
## [1] 1.251477
## 
## $maxCoh
## [1] 0.1523391
## 
## $maxFreq
## [1] 8.727273
## 
## $pVal
## [1] 0.0553945
acfTestFreq=function(stationData,nutrient){
  if(sum(grepl(paste(nutrient,"Transform",sep=""),names(stationData)))>0){
    varName=paste("residGAM",nutrient,"Transform",sep="")
  }else{
    varName=paste("residGAM",nutrient,sep="")
  }
 
  test=spectrum(stationData[,varName],taper=.2,log="no",spans=c(16,16),demean=T,detrend=F,plot=F)
  pVal=1-pchisq(test$spec,2)
 
  maxSpec=test$spec[which.max(test$spec)]
  maxFreq=1/test$freq[which.max(test$spec)]
  pVal=pVal[which.max(test$spec)]

  return(list(maxSpec=maxSpec,maxFreq=maxFreq,pVal=pVal))
  
}


acfTestFreq(D10,"chl")
## $maxSpec
## [1] 0.2237561
## 
## $maxFreq
## [1] 4
## 
## $pVal
## [1] 0.8941533

Automate checking all possible pairs in the frequency domain.

stationNames<-c("D10","D12","D22","D26","D4")
varNames<-c("chl","do","pheo","sal","temp")

allCombo=expand.grid(stationNames,stationNames,varNames,varNames)
dim(allCombo)
## [1] 625   4
head(allCombo)
##   Var1 Var2 Var3 Var4
## 1  D10  D10  chl  chl
## 2  D12  D10  chl  chl
## 3  D22  D10  chl  chl
## 4  D26  D10  chl  chl
## 5   D4  D10  chl  chl
## 6  D10  D12  chl  chl
names(allCombo)=c("station1","station2","var1","var2")

withinStation=allCombo[which(allCombo$station1==allCombo$station2),]
dim(withinStation) ## 125
## [1] 125   4
withinStation[,1]=as.character(withinStation[,1])
withinStation[,2]=as.character(withinStation[,2])
withinStation[,3]=as.character(withinStation[,3])
withinStation[,4]=as.character(withinStation[,4])

withinStationSameVar=withinStation[which(withinStation$var1==withinStation$var2),]
dim(withinStationSameVar) ## 25
## [1] 25  4
withinStationSameVar[,1]=as.character(withinStationSameVar[,1])
withinStationSameVar[,2]=as.character(withinStationSameVar[,2])
withinStationSameVar[,3]=as.character(withinStationSameVar[,3])
withinStationSameVar[,4]=as.character(withinStationSameVar[,4])

withinStationDiffVar=withinStation[which(withinStation$var1!=withinStation$var2),]
dim(withinStationDiffVar) ## 100
## [1] 100   4
withinStationDiffVar[,1]=as.character(withinStationDiffVar[,1])
withinStationDiffVar[,2]=as.character(withinStationDiffVar[,2])
withinStationDiffVar[,3]=as.character(withinStationDiffVar[,3])
withinStationDiffVar[,4]=as.character(withinStationDiffVar[,4])

leftOver=allCombo[-which(allCombo$station1==allCombo$station2),]
acrossStationSameVar=leftOver[which(leftOver$var1==leftOver$var2),]
dim(acrossStationSameVar) ## 100
## [1] 100   4
acrossStationSameVar[,1]=as.character(acrossStationSameVar[,1])
acrossStationSameVar[,2]=as.character(acrossStationSameVar[,2])
acrossStationSameVar[,3]=as.character(acrossStationSameVar[,3])
acrossStationSameVar[,4]=as.character(acrossStationSameVar[,4])

leftOver2=leftOver[-which(leftOver$var1==leftOver$var2),]
dim(leftOver2)
## [1] 400   4
acrossStationDiffVar=leftOver2[which(leftOver2$var1!=leftOver2$var2),]
dim(acrossStationDiffVar) ## 400 same as leftOver2 as expected 
## [1] 400   4
acrossStationDiffVar[,1]=as.character(acrossStationDiffVar[,1])
acrossStationDiffVar[,2]=as.character(acrossStationDiffVar[,2])
acrossStationDiffVar[,3]=as.character(acrossStationDiffVar[,3])
acrossStationDiffVar[,4]=as.character(acrossStationDiffVar[,4])
setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-c()
for(i in 1:nrow(acrossStationSameVar)){
  
  res=ccfTestFreq(storeData[[acrossStationSameVar$station1[i]]],storeData[[acrossStationSameVar$station2[i]]],
                  as.character(acrossStationSameVar$var1[i]),as.character(acrossStationSameVar$var2[i]))
  
  
  acrossStationSameVarResults<-rbind(acrossStationSameVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
  print(i)
}

write.csv(acrossStationSameVarResults,"acrossStationSameVarResultsFreq.csv",row.names=F)


acrossStationDiffVarResults<-c()
for(i in 1:nrow(acrossStationDiffVar)){
  res= ccfTestFreq(storeData[[acrossStationDiffVar$station1[i]]],storeData[[acrossStationDiffVar$station2[i]]],
                   as.character(acrossStationDiffVar$var1[i]),as.character(acrossStationDiffVar$var2[i]))
  
  
  acrossStationDiffVarResults<-rbind(acrossStationDiffVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
  print(i)
}

write.csv(acrossStationDiffVarResults,"acrossStationDiffVarResultsFreq.csv",row.names=F)


withinStationDiffVarResults<-c()
for(i in 1:nrow(withinStationDiffVar)){
  res=ccfTestFreq(storeData[[withinStationDiffVar$station1[i]]],storeData[[withinStationDiffVar$station2[i]]],
                  as.character(withinStationDiffVar$var1[i]),as.character(withinStationDiffVar$var2[i]))
  withinStationDiffVarResults<-rbind(withinStationDiffVarResults,c(res$maxPhase,res$maxCoh,res$maxFreq,res$pVal))
  print(i)
}
write.csv(withinStationDiffVarResults,"withinStationDiffVarResultsFreq.csv",row.names=F)


## don't use, not part of research question
withinStationSameVarResults<-c()
for(i in 1:nrow(withinStationSameVar)){
  res=acfTestFreq(storeData[[withinStationSameVar$station1[i]]],as.character(withinStationSameVar$var1[i]))
  withinStationSameVarResults<-rbind(withinStationSameVarResults,c(res$maxSpec,res$maxFreq,res$pVal))
  print(i)
}

write.csv(withinStationSameVarResults,"withinStationSameVarResultsFreq.csv",row.names=F)

Finding significant pairs in time domain

setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-read.csv("acrossStationSameVarResults.csv",stringsAsFactors=F)
acrossStationDiffVarResults<-read.csv("acrossStationDiffVarResults.csv",stringsAsFactors=F)
withinStationSameVarResults<-read.csv("withinStationSameVarResults.csv",stringsAsFactors=F)
withinStationDiffVarResults<-read.csv("withinStationDiffVarResults.csv",stringsAsFactors=F)

names(acrossStationSameVarResults)=names(acrossStationDiffVarResults)=names(withinStationSameVarResults)=
names(withinStationDiffVarResults)=c("maxLag","pVal","minPval")

## first check to make sure we have availability to get significant p-values

summary(acrossStationSameVarResults$minPval) ## 0.005376  for all is the max min pval, so we are good
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(acrossStationDiffVarResults$minPval)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(withinStationSameVarResults$minPval)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
summary(withinStationDiffVarResults$minPval)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.005376 0.005376 0.005376 0.005376 0.005376 0.005376
length(which(acrossStationSameVarResults$pVal<0.05)) ## 18
## [1] 18
nrow(acrossStationSameVarResults) ## 100
## [1] 100
length(which(acrossStationDiffVarResults$pVal<0.05)) ## 83
## [1] 83
nrow(acrossStationDiffVarResults) ## 400
## [1] 400
length(which(withinStationSameVarResults$pVal<0.05)) ## 2
## [1] 2
nrow(withinStationSameVarResults) ## 25
## [1] 25
length(which(withinStationDiffVarResults$pVal<0.05)) ## 23
## [1] 23
nrow(withinStationDiffVarResults) ## 100
## [1] 100
sum(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 3
## 3

sigToPlot=acrossStationSameVar[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationSameVarResults[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
sigToPlot
##     station1 station2 var1 var2
## 153      D22      D10   do   do
## 464      D26      D22  sal  sal
## 620       D4      D26 temp temp
resToPlot
##    maxLag pVal     minPval
## 22      6    0 0.005376344
## 71      1    0 0.005376344
## 96      6    0 0.005376344
sum(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 8
## 8

sigToPlot=acrossStationDiffVar[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationDiffVarResults[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
##     station1 station2 var1 var2
## 36       D10      D22   do  chl
## 89       D26      D22  sal  chl
## 206      D10      D12  sal   do
## 240       D4      D22 temp   do
## 241      D10      D26 temp   do
## 440       D4      D22 pheo  sal
## 449      D26       D4 pheo  sal
## 547      D12       D4   do temp
resToPlot
##     maxLag pVal     minPval
## 9        2    0 0.005376344
## 51       2    0 0.005376344
## 125      4    0 0.005376344
## 152      4    0 0.005376344
## 153      7    0 0.005376344
## 292      3    0 0.005376344
## 300      3    0 0.005376344
## 358      8    0 0.005376344
sum(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 1
## 1

sigToPlot=withinStationDiffVar[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=withinStationDiffVarResults[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]

sigToPlot
##     station1 station2 var1 var2
## 532      D12      D12   do temp
resToPlot
##    maxLag pVal     minPval
## 87      8    0 0.005376344

Find significant pairs in frequency domain

setwd("~/UC_Berkeley/Semester_4/timeSeries")
acrossStationSameVarResults<-read.csv("acrossStationSameVarResultsFreq.csv",stringsAsFactors=F)
acrossStationDiffVarResults<-read.csv("acrossStationDiffVarResultsFreq.csv",stringsAsFactors=F)
#withinStationSameVarResults<-read.csv("withinStationSameVarResultsFreq.csv",stringsAsFactors=F)
withinStationDiffVarResults<-read.csv("withinStationDiffVarResultsFreq.csv",stringsAsFactors=F)

names(acrossStationSameVarResults)=names(acrossStationDiffVarResults)=names(withinStationDiffVarResults)=c("maxPhase","maxCoh","maxFreq","pVal")

length(which(acrossStationSameVarResults$pVal<0.05)) ## 100 pw 100
## [1] 100
nrow(acrossStationSameVarResults) ## 100
## [1] 100
hist(acrossStationSameVarResults$pVal)

length(which(acrossStationDiffVarResults$pVal<0.05)) ## 266 pw 262
## [1] 266
nrow(acrossStationDiffVarResults) ## 400
## [1] 400
hist(acrossStationDiffVarResults$pVal)

length(which(withinStationDiffVarResults$pVal<0.05)) ## 64 pw 64
## [1] 64
nrow(withinStationDiffVarResults) ## 100
## [1] 100
hist(withinStationDiffVarResults$pVal)

sum(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 90
## 90 pw 92
sigToPlot=acrossStationSameVar[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationSameVarResults[which(p.adjust(acrossStationSameVarResults$pVal, method = "BY") <0.05),]
sigToPlot
##     station1 station2 var1 var2
## 2        D12      D10  chl  chl
## 3        D22      D10  chl  chl
## 4        D26      D10  chl  chl
## 5         D4      D10  chl  chl
## 6        D10      D12  chl  chl
## 8        D22      D12  chl  chl
## 9        D26      D12  chl  chl
## 10        D4      D12  chl  chl
## 11       D10      D22  chl  chl
## 12       D12      D22  chl  chl
## 14       D26      D22  chl  chl
## 15        D4      D22  chl  chl
## 16       D10      D26  chl  chl
## 17       D12      D26  chl  chl
## 18       D22      D26  chl  chl
## 20        D4      D26  chl  chl
## 21       D10       D4  chl  chl
## 22       D12       D4  chl  chl
## 23       D22       D4  chl  chl
## 24       D26       D4  chl  chl
## 152      D12      D10   do   do
## 153      D22      D10   do   do
## 154      D26      D10   do   do
## 155       D4      D10   do   do
## 156      D10      D12   do   do
## 158      D22      D12   do   do
## 159      D26      D12   do   do
## 160       D4      D12   do   do
## 161      D10      D22   do   do
## 162      D12      D22   do   do
## 164      D26      D22   do   do
## 165       D4      D22   do   do
## 166      D10      D26   do   do
## 167      D12      D26   do   do
## 168      D22      D26   do   do
## 170       D4      D26   do   do
## 171      D10       D4   do   do
## 172      D12       D4   do   do
## 173      D22       D4   do   do
## 174      D26       D4   do   do
## 302      D12      D10 pheo pheo
## 303      D22      D10 pheo pheo
## 305       D4      D10 pheo pheo
## 306      D10      D12 pheo pheo
## 308      D22      D12 pheo pheo
## 310       D4      D12 pheo pheo
## 311      D10      D22 pheo pheo
## 312      D12      D22 pheo pheo
## 314      D26      D22 pheo pheo
## 315       D4      D22 pheo pheo
## 318      D22      D26 pheo pheo
## 320       D4      D26 pheo pheo
## 321      D10       D4 pheo pheo
## 322      D12       D4 pheo pheo
## 323      D22       D4 pheo pheo
## 324      D26       D4 pheo pheo
## 452      D12      D10  sal  sal
## 453      D22      D10  sal  sal
## 455       D4      D10  sal  sal
## 456      D10      D12  sal  sal
## 458      D22      D12  sal  sal
## 459      D26      D12  sal  sal
## 460       D4      D12  sal  sal
## 461      D10      D22  sal  sal
## 462      D12      D22  sal  sal
## 465       D4      D22  sal  sal
## 467      D12      D26  sal  sal
## 471      D10       D4  sal  sal
## 472      D12       D4  sal  sal
## 473      D22       D4  sal  sal
## 602      D12      D10 temp temp
## 603      D22      D10 temp temp
## 604      D26      D10 temp temp
## 605       D4      D10 temp temp
## 606      D10      D12 temp temp
## 608      D22      D12 temp temp
## 609      D26      D12 temp temp
## 610       D4      D12 temp temp
## 611      D10      D22 temp temp
## 612      D12      D22 temp temp
## 614      D26      D22 temp temp
## 615       D4      D22 temp temp
## 616      D10      D26 temp temp
## 617      D12      D26 temp temp
## 618      D22      D26 temp temp
## 620       D4      D26 temp temp
## 621      D10       D4 temp temp
## 622      D12       D4 temp temp
## 623      D22       D4 temp temp
## 624      D26       D4 temp temp
resToPlot
##          maxPhase    maxCoh    maxFreq         pVal
## 1   -1.614801e-01 0.4997432   4.363636 5.421407e-06
## 2    6.263012e-03 0.4735768 192.000000 1.323509e-05
## 3   -6.035021e-01 0.4704407   4.571429 1.468534e-05
## 4   -2.013778e-02 0.7349544 192.000000 8.030165e-11
## 5    1.614801e-01 0.4997432   4.363636 5.421407e-06
## 6    8.386601e-17 0.2905206   2.000000 2.457938e-03
## 7   -3.834747e-01 0.4161474  13.714286 8.108387e-05
## 8    3.003807e-02 0.6125318   4.682927 6.190318e-08
## 9   -6.263012e-03 0.4735768 192.000000 1.323509e-05
## 10  -1.536679e-16 0.2905206   2.000000 2.457938e-03
## 11  -2.742308e-01 0.2691103   2.865672 4.136264e-03
## 12  -2.505597e-02 0.5935356 192.000000 1.430868e-07
## 13   6.035021e-01 0.4704407   4.571429 1.468534e-05
## 14   3.834747e-01 0.4161474  13.714286 8.108387e-05
## 15   2.742308e-01 0.2691103   2.865672 4.136264e-03
## 16   5.987012e-01 0.5758708   3.918367 3.013217e-07
## 17   2.013778e-02 0.7349544 192.000000 8.030165e-11
## 18  -3.003807e-02 0.6125318   4.682927 6.190318e-08
## 19   2.505597e-02 0.5935356 192.000000 1.430868e-07
## 20  -5.987012e-01 0.5758708   3.918367 3.013217e-07
## 21  -7.752784e-02 0.7954245  16.000000 8.627543e-13
## 22  -9.803885e-02 0.7799103   8.347826 3.101963e-12
## 23   1.924453e-01 0.5921634  10.666667 1.517827e-07
## 24  -6.373451e-02 0.8672090  12.800000 4.440892e-16
## 25   7.752784e-02 0.7954245  16.000000 8.627543e-13
## 26   2.955340e-01 0.6851673   2.430380 1.634745e-09
## 27   1.299838e-01 0.5290375  11.294118 1.885132e-06
## 28  -2.502953e-02 0.7420414  12.000000 4.996537e-11
## 29   9.803885e-02 0.7799103   8.347826 3.101963e-12
## 30  -2.955340e-01 0.6851673   2.430380 1.634745e-09
## 31   3.474642e-01 0.5801154  12.800000 2.526745e-07
## 32   2.915141e-02 0.8602990  10.666667 1.110223e-15
## 33  -1.924453e-01 0.5921634  10.666667 1.517827e-07
## 34  -1.299838e-01 0.5290375  11.294118 1.885132e-06
## 35  -3.474642e-01 0.5801154  12.800000 2.526745e-07
## 36  -3.543822e-01 0.6098341  10.666667 6.989779e-08
## 37   6.373451e-02 0.8672090  12.800000 4.440892e-16
## 38   2.502953e-02 0.7420414  12.000000 4.996537e-11
## 39  -2.915141e-02 0.8602990  10.666667 1.110223e-15
## 40   3.543822e-01 0.6098341  10.666667 6.989779e-08
## 41   8.321366e-03 0.5811057 192.000000 2.424428e-07
## 42  -1.561057e-02 0.5732685 192.000000 3.353791e-07
## 44   1.082323e-01 0.4532215  12.800000 2.571347e-05
## 45  -8.321366e-03 0.5811057 192.000000 2.424428e-07
## 46  -7.617191e-01 0.3522855  10.666667 4.990069e-04
## 48  -1.306446e-01 0.4990255  11.294118 5.559183e-06
## 49   1.561057e-02 0.5732685 192.000000 3.353791e-07
## 50   7.617191e-01 0.3522855  10.666667 4.990069e-04
## 51  -1.177906e-02 0.3005925 192.000000 1.913675e-03
## 52   3.093948e-01 0.4842186   6.193548 9.256798e-06
## 55   1.177906e-02 0.3005925 192.000000 1.913675e-03
## 56   7.132110e-01 0.3179848   4.465116 1.231461e-03
## 57  -1.082323e-01 0.4532215  12.800000 2.571347e-05
## 58   1.306446e-01 0.4990255  11.294118 5.559183e-06
## 59  -3.093948e-01 0.4842186   6.193548 9.256798e-06
## 60  -7.132110e-01 0.3179848   4.465116 1.231461e-03
## 61  -2.848852e-01 0.8453521   6.620690 6.439294e-15
## 62   2.655789e-01 0.5182194   5.818182 2.805440e-06
## 64  -1.614712e-01 0.8879604   6.857143 0.000000e+00
## 65   2.848852e-01 0.8453521   6.620690 6.439294e-15
## 66  -4.474455e-16 0.6570138   2.000000 7.321721e-09
## 67   6.537201e-03 0.3824503 192.000000 2.165359e-04
## 68   2.036028e-01 0.8218177   9.142857 7.682743e-14
## 69  -2.655789e-01 0.5182194   5.818182 2.805440e-06
## 70  -5.435023e-16 0.6570138   2.000000 7.321721e-09
## 72  -2.188512e-01 0.5963865   2.370370 1.264987e-07
## 74  -6.537201e-03 0.3824503 192.000000 2.165359e-04
## 77   1.614712e-01 0.8879604   6.857143 0.000000e+00
## 78  -2.036028e-01 0.8218177   9.142857 7.682743e-14
## 79   2.188512e-01 0.5963865   2.370370 1.264987e-07
## 81  -1.306025e-01 0.8755150  96.000000 2.220446e-16
## 82  -6.145561e-02 0.8279414  19.200000 4.174439e-14
## 83  -1.827559e-01 0.9282613  38.400000 0.000000e+00
## 84  -1.118319e-01 0.9404211  24.000000 0.000000e+00
## 85   1.306025e-01 0.8755150  96.000000 2.220446e-16
## 86   1.114711e-01 0.7349132   4.800000 8.052048e-11
## 87  -5.574506e-05 0.9272318  96.000000 0.000000e+00
## 88   6.880169e-02 0.9204743  96.000000 0.000000e+00
## 89   6.145561e-02 0.8279414  19.200000 4.174439e-14
## 90  -1.114711e-01 0.7349132   4.800000 8.052048e-11
## 91  -1.588070e-01 0.8319947  64.000000 2.753353e-14
## 92  -3.018674e-02 0.8831900  19.200000 0.000000e+00
## 93   1.827559e-01 0.9282613  38.400000 0.000000e+00
## 94   5.574506e-05 0.9272318  96.000000 0.000000e+00
## 95   1.588070e-01 0.8319947  64.000000 2.753353e-14
## 96   9.053966e-02 0.9466961  64.000000 0.000000e+00
## 97   1.118319e-01 0.9404211  24.000000 0.000000e+00
## 98  -6.880169e-02 0.9204743  96.000000 0.000000e+00
## 99   3.018674e-02 0.8831900  19.200000 0.000000e+00
## 100 -9.053966e-02 0.9466961  64.000000 0.000000e+00
sum(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 60
sigToPlot=acrossStationDiffVar[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=acrossStationDiffVarResults[which(p.adjust(acrossStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
##     station1 station2 var1 var2
## 37       D12      D22   do  chl
## 60        D4      D12 pheo  chl
## 65        D4      D22 pheo  chl
## 77       D12      D10  sal  chl
## 78       D22      D10  sal  chl
## 97       D12       D4  sal  chl
## 98       D22       D4  sal  chl
## 133      D22      D12  chl   do
## 210       D4      D12  sal   do
## 227      D12      D10 temp   do
## 228      D22      D10 temp   do
## 229      D26      D10 temp   do
## 230       D4      D10 temp   do
## 231      D10      D12 temp   do
## 233      D22      D12 temp   do
## 234      D26      D12 temp   do
## 235       D4      D12 temp   do
## 236      D10      D22 temp   do
## 237      D12      D22 temp   do
## 239      D26      D22 temp   do
## 240       D4      D22 temp   do
## 245       D4      D26 temp   do
## 246      D10       D4 temp   do
## 247      D12       D4 temp   do
## 248      D22       D4 temp   do
## 249      D26       D4 temp   do
## 272      D12       D4  chl pheo
## 273      D22       D4  chl pheo
## 336      D10      D22  sal pheo
## 337      D12      D22  sal pheo
## 340       D4      D22  sal pheo
## 381      D10      D12  chl  sal
## 385       D4      D12  chl  sal
## 386      D10      D22  chl  sal
## 390       D4      D22  chl  sal
## 422      D12       D4   do  sal
## 428      D22      D10 pheo  sal
## 433      D22      D12 pheo  sal
## 448      D22       D4 pheo  sal
## 491      D10      D26 temp  sal
## 495       D4      D26 temp  sal
## 527      D12      D10   do temp
## 528      D22      D10   do temp
## 530       D4      D10   do temp
## 531      D10      D12   do temp
## 533      D22      D12   do temp
## 535       D4      D12   do temp
## 536      D10      D22   do temp
## 537      D12      D22   do temp
## 540       D4      D22   do temp
## 541      D10      D26   do temp
## 542      D12      D26   do temp
## 543      D22      D26   do temp
## 545       D4      D26   do temp
## 546      D10       D4   do temp
## 547      D12       D4   do temp
## 548      D22       D4   do temp
## 549      D26       D4   do temp
## 579      D26      D10  sal temp
## 599      D26       D4  sal temp
resToPlot
##       maxPhase    maxCoh    maxFreq         pVal
## 10  -0.6223666 0.3717467   3.368421 2.925321e-04
## 28  -0.3005707 0.3852084   5.485714 2.002159e-04
## 32   0.0459174 0.5442074   2.064516 1.062734e-06
## 41   3.1385178 0.4330774 192.000000 4.844168e-05
## 42   3.1018280 0.3212908 192.000000 1.131038e-03
## 58  -3.1226113 0.4864547 192.000000 8.578843e-06
## 59   3.1205941 0.5968667 192.000000 1.238900e-07
## 86   0.6223666 0.3717467   3.368421 2.925321e-04
## 128 -0.8580222 0.3780607   6.400000 2.451202e-04
## 141 -3.1415927 0.5101513   2.000000 3.752020e-06
## 142 -3.0575810 0.4698239   8.347826 1.498769e-05
## 143 -2.9996736 0.4979480   8.347826 5.772256e-06
## 144 -3.0279939 0.5271913   7.111111 2.018767e-06
## 145 -3.0882514 0.3788031  13.714286 2.400485e-04
## 146 -3.0440486 0.4072159  12.000000 1.057685e-04
## 147  2.8842902 0.4449997   2.742857 3.339104e-05
## 148  3.0020436 0.4313440   2.742857 5.110095e-05
## 149 -3.1415927 0.5047162   2.000000 4.551507e-06
## 150  3.1415927 0.4936105   2.000000 6.710374e-06
## 151 -3.1415927 0.4774100   2.000000 1.164568e-05
## 152 -3.1415927 0.6850216   2.000000 1.648045e-09
## 156  2.9485047 0.4048876   2.704225 1.132816e-04
## 157 -3.0609952 0.4760218  12.800000 1.219926e-05
## 158  3.1415927 0.4672412   2.000000 1.631854e-05
## 159 -3.0975206 0.4584543   8.347826 2.172956e-05
## 160 -3.1415927 0.5404287   2.000000 1.227987e-06
## 178  0.3005707 0.3852084   5.485714 2.002159e-04
## 179 -0.0459174 0.5442074   2.064516 1.062734e-06
## 209 -0.6312983 0.3602177  12.800000 4.021844e-04
## 210 -1.0720948 0.4046958  14.769231 1.139224e-04
## 212 -0.6994686 0.3736429  12.800000 2.774552e-04
## 245 -3.1385178 0.4330774 192.000000 4.844168e-05
## 248  3.1226113 0.4864547 192.000000 8.578843e-06
## 249 -3.1018280 0.3212908 192.000000 1.131038e-03
## 252 -3.1205941 0.5968667 192.000000 1.238900e-07
## 278  0.8580222 0.3780607   6.400000 2.451202e-04
## 282  0.6312983 0.3602177  12.800000 4.021844e-04
## 286  1.0720948 0.4046958  14.769231 1.139224e-04
## 299  0.6994686 0.3736429  12.800000 2.774552e-04
## 313  2.5032456 0.3772504  12.800000 2.507716e-04
## 316  2.3215352 0.3481116  12.800000 5.583948e-04
## 341  3.0882514 0.3788031  13.714286 2.400485e-04
## 342  3.1415927 0.5047162   2.000000 4.551507e-06
## 344  3.0609952 0.4760218  12.800000 1.219926e-05
## 345  3.1415927 0.5101513   2.000000 3.752020e-06
## 346  3.1415927 0.4936105   2.000000 6.710374e-06
## 348 -3.1415927 0.4672412   2.000000 1.631854e-05
## 349  3.0575810 0.4698239   8.347826 1.498769e-05
## 350  3.0440486 0.4072159  12.000000 1.057685e-04
## 352  3.0975206 0.4584543   8.347826 2.172956e-05
## 353  2.9996736 0.4979480   8.347826 5.772256e-06
## 354 -2.8842902 0.4449997   2.742857 3.339104e-05
## 355 -3.1415927 0.4774100   2.000000 1.164568e-05
## 356 -3.1415927 0.5404287   2.000000 1.227987e-06
## 357  3.0279939 0.5271913   7.111111 2.018767e-06
## 358 -3.0020436 0.4313440   2.742857 5.110095e-05
## 359  3.1415927 0.6850216   2.000000 1.648045e-09
## 360 -2.9485047 0.4048876   2.704225 1.132816e-04
## 383 -2.5032456 0.3772504  12.800000 2.507716e-04
## 400 -2.3215352 0.3481116  12.800000 5.583948e-04
sum(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05)## corrected p-val
## [1] 26
sigToPlot=withinStationDiffVar[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
resToPlot=withinStationDiffVarResults[which(p.adjust(withinStationDiffVarResults$pVal, method = "BY") <0.05),]
sigToPlot
##     station1 station2 var1 var2
## 51       D10      D10 pheo  chl
## 57       D12      D12 pheo  chl
## 63       D22      D22 pheo  chl
## 69       D26      D26 pheo  chl
## 75        D4       D4 pheo  chl
## 207      D12      D12  sal   do
## 226      D10      D10 temp   do
## 232      D12      D12 temp   do
## 238      D22      D22 temp   do
## 244      D26      D26 temp   do
## 250       D4       D4 temp   do
## 251      D10      D10  chl pheo
## 257      D12      D12  chl pheo
## 263      D22      D22  chl pheo
## 269      D26      D26  chl pheo
## 275       D4       D4  chl pheo
## 363      D22      D22 temp pheo
## 407      D12      D12   do  sal
## 494      D26      D26 temp  sal
## 526      D10      D10   do temp
## 532      D12      D12   do temp
## 538      D22      D22   do temp
## 544      D26      D26   do temp
## 550       D4       D4   do temp
## 563      D22      D22 pheo temp
## 594      D26      D26  sal temp
resToPlot
##       maxPhase    maxCoh   maxFreq         pVal
## 6   0.03230866 0.4691372  2.704225 1.533119e-05
## 7   0.07122826 0.3280448  3.622642 9.493838e-04
## 8  -0.29063634 0.4842777  5.818182 9.238245e-06
## 9  -0.22025085 0.3208126 24.000000 1.145071e-03
## 10 -0.47971020 0.5922786  5.647059 1.510341e-07
## 32 -0.93897895 0.2945916  7.111111 2.222397e-03
## 36 -3.00530632 0.4878493 12.000000 8.180023e-06
## 37 -3.03138569 0.3923411  2.086957 1.632216e-04
## 38 -3.10075220 0.4984476  7.111111 5.672513e-06
## 39 -2.93436039 0.4326341  6.193548 4.910905e-05
## 40 -3.14159265 0.6418548  2.000000 1.561047e-08
## 41 -0.03230866 0.4691372  2.704225 1.533119e-05
## 42 -0.07122826 0.3280448  3.622642 9.493838e-04
## 43  0.29063634 0.4842777  5.818182 9.238245e-06
## 44  0.22025085 0.3208126 24.000000 1.145071e-03
## 45  0.47971020 0.5922786  5.647059 1.510341e-07
## 58 -1.42010544 0.3314213  2.341463 8.692469e-04
## 67  0.93897895 0.2945916  7.111111 2.222397e-03
## 79  2.22431284 0.3009051 12.800000 1.898757e-03
## 86  3.00530632 0.4878493 12.000000 8.180023e-06
## 87  3.03138569 0.3923411  2.086957 1.632216e-04
## 88  3.10075220 0.4984476  7.111111 5.672513e-06
## 89  2.93436039 0.4326341  6.193548 4.910905e-05
## 90 -3.14159265 0.6418548  2.000000 1.561047e-08
## 93  1.42010544 0.3314213  2.341463 8.692469e-04
## 99 -2.22431284 0.3009051 12.800000 1.898757e-03